#########################################################
####### Vocabularies of Supreme Court Justices     ######
####### Adam Chilton, Kevin Jiang, and Eric Posner ######
#########################################################


##########################################################################
################# Read Me 							    ########################
##########################################################################

# This code replicates the analysis for our Slate article on the vocabularies 
# of Supreme Court Justices. 

#### The Sample ####

# We drew our sample from three different sources.

# First, for current Supreme Court members, we downloaded from WestLaw all of 
# the majority opinions that each justice had written since Justice Kagan joined 
# the court. We then deleted all of the captions, west law formatting, footnotes, 
# and appendixes. We merged all of these opinions into one .txt file for each justice. 

# Second, we downloaded 32 opinions for each of 5 famous prior Supreme Court
# Justices. We decided (somewhat arbitrarily) to download 32 for each justice 
# because it was the number of opinions we had for Scalia. These opinions were
# downloaded from West Law with the after the "most cited" filter had been applied. 

# Third, for the three rappers in our sample (and Shakespeare) we took the number of 
# unique words reported by Matt Daniels, and divided it by 35,000. This is the number 
# of lyrics that Daniels reportedly analyzed for each rapper. Daniels original post is 
# available here: http://rappers.mdaniels.com.s3-website-us-east-1.amazonaws.com/

#### The Method ####

# We loaded the corpus of opinions into R using the TM package. 

# We then pre-processed the data by: removing stop words, removing whitespace, removing
# punctation, removing numbers, and converting everything to lower case. 

# We then created a term document matrix, and then for each justice, counted the number of terms 
# and total number of words. Dividing the number of unique words by total words created our 
# index of unique words. 

#### Limitations ####

# We should note that there are obviously some limitations to our method. Here are a few:

# First, the formatting and style of current opinions may be different that it would have 
# been for the 5 famous justices in our sample. For example, if there were dramatically more in
# text citations and parentheticals in modern opinions, Justices may be getting "credit" for 
# these unique words. As a result, this is likely not the best way to make comparison across generations. 

# Second, the "tm" package is good but not perfect. We may be disregarding unique words that we 
# should be counting, or keeping in punctuation that should be disregarded. We know this is a noisy, 
# crude measure. To the extent that these mistakes are randomly distributed across the opinions analyzed,
# this shouldn't impact our underlying result. Of course, there is reason to think that mistakes would 
# not be randomly distributed. For example, they may happen more with current justices than older justices 
# (or vice versa). 

# Third, Daniels' likely used different software and pre-processing techniques than we did to get the raw 
# numbers of words and unique words for the rappers and Shakespeare. As a result, these are not perfect
# apples-to-apples comparisons. 

#### Disclaimer & Contact Info ####

# Finally, we should note that this is just rough analysis that was intended to be entertaining. 
# Any questions, comments, or errors should be directed to adamchilton@uchicago.edu. 

#### Instructions #####

# To replicate our analysis, simply run this code in R. 
# The code for our analysis was run in: R 3.0.2 GUI 1.62 Snow Leopard build (6558). 
# We ran this code on June 4, 2014. 

##########################################################################
###################	Installing and Loading Packages	######################
##########################################################################

install.packages("tm")
install.packages("foreign")
library(tm)
library(foreign)

##########################################################################
################### Loading and Processing the Data	######################
##########################################################################


corpus <-Corpus(DirSource("Opinions"))
corpus.p <-tm_map(corpus, removeWords, stopwords("english"))  #removes stopwords
corpus.p <-tm_map(corpus.p, stripWhitespace)  #removes stopwords
corpus.p <-tm_map(corpus.p, tolower)  
corpus.p <-tm_map(corpus.p, removeNumbers)
corpus.p <-tm_map(corpus.p, removePunctuation)
dtm <-DocumentTermMatrix(corpus.p)
docTermMatrix <- inspect(dtm)
Justices <- cbind(docTermMatrix[,0],	rowSums(docTermMatrix!= 0),rowSums(docTermMatrix))

##########################################################################
###################	Calculating the Unique Word percent	##################
##########################################################################

Justices

alito <- Justices[1,1]/Justices[1,2]
brandeis <- Justices[2,1]/Justices[2,2]
breyer <- Justices[3,1]/Justices[3,2]
frankfurter <- Justices[4,1]/Justices[4,2]
ginsburg <- Justices[5,1]/Justices[5,2]
holmes <- Justices[6,1]/Justices[6,2]
jackson <- Justices[7,1]/Justices[7,2]
kagan <- Justices[8,1]/Justices[8,2]
kennedy <- Justices[9,1]/Justices[9,2]
marshall <- Justices[10,1]/Justices[10,2]
roberts <- Justices[11,1]/Justices[11,2]
scalia <- Justices[12,1]/Justices[12,2]
sotomayor <- Justices[13,1]/Justices[13,2]
thomas <- Justices[14,1]/Justices[14,2]

##########################################################################
###################	Creating the Graph				######################
##########################################################################


par(mar=c(1, 1, 1, 1))
plot(0,0, xlim=c(0.07,0.23), ylim=c(-1.1,1.0), ylab=" ", xlab="Marginal Effects on BIT Signing (%)", type= "n", frame.plot=F,axes=F)
%segments(1750, 0, x1 = 3200, y1 = 0)
arrows(0.07, 0, x1 = 0.23, y1 = 0, length = 0.25, angle = 30)
arrows(0.23, 0, x1 = 0.07, y1 = 0, length = 0.25, angle = 30)

#text(0.15,1.4,"Percent of Unique Words",cex=2.5)

segments(0.08, -0.05, x1 =0.08, y1 = 0.05)
text(0.08, -0.2, "8%",cex = 1.2)
segments(0.10, -0.05, x1 =0.1, y1 = 0.05)
text(0.10, -0.2, "10%",cex = 1.2)
segments(0.12, -0.05, x1 =0.12, y1 = 0.05)
text(0.12, -0.2, "12%",cex = 1.2)
segments(0.14, -0.05, x1 =0.14, y1 = 0.05)
text(0.14, -0.2, "14%",cex = 1.2)
segments(0.16, -0.05, x1 =0.16, y1 = 0.05)
text(0.16, -0.2, "16%",cex = 1.2)
segments(0.18, -0.05, x1 =0.18, y1 = 0.05)
text(0.18, -0.2, "18%",cex = 1.2)
segments(.20, -0.05, x1 =0.2, y1 = 0.05)
text(0.2, -0.2, "20%",cex = 1.2)
segments(.22, -0.05, x1 =0.22, y1 = 0.05)
text(0.22, -0.2, "22%",cex = 1.2)


##########################################################################
###################	Current Justices				######################
##########################################################################


#### Kennedy ########
points(kennedy, 0,col="firebrick1",pch=19,lwd=5)
text(kennedy, 0.53, "Kennedy",cex = 1.2,col="firebrick1")
segments(kennedy,0.0, x1 =kennedy, y1 = 0.5,col="firebrick1",lty=3)

#### Roberts ########
points(roberts, 0,col="firebrick1",pch=19,lwd=5)
text(roberts, -0.63, "Roberts",cex = 1.2,col="firebrick1")
segments(roberts,0.0, x1 =roberts, y1 = -0.6,col="firebrick1",lty=3)

#### Ginsburg ########
points(ginsburg, 0,col="firebrick1",pch=19,lwd=5)
text(ginsburg, 0.63, "Ginsburg",cex = 1.2,col="firebrick1")
segments(ginsburg,0.0, x1 =ginsburg, y1 = 0.6,col="firebrick1",lty=3)

#### Sotomayor ########
points(sotomayor, 0,col="firebrick1",pch=19,lwd=5)
text(sotomayor, 0.33, "Sotomayor",cex = 1.2,col="firebrick1")
segments(sotomayor,0.0, x1 =sotomayor, y1 = 0.3,col="firebrick1",lty=3)

#### Alito ########
points(alito, 0,col="firebrick1",pch=19,lwd=5)
text(alito, -0.78, "Alito",cex = 1.2,col="firebrick1")
segments(alito,0.0, x1 =alito, y1 = -0.75,col="firebrick1",lty=3)

#### Breyer ########
points(breyer, 0,col="firebrick1",pch=19,lwd=5)
text(breyer, -0.93, "Breyer",cex = 1.2,col="firebrick1")
segments(breyer,0.0, x1 =breyer, y1 = -0.9,col="firebrick1",lty=3)

#### Thomas ########
points(thomas, 0,col="firebrick1",pch=19,lwd=5)
text(thomas, -.493, "Thomas",cex = 1.2,col="firebrick1")
segments(thomas,0.0, x1 =thomas, y1 = -0.49,col="firebrick1",lty=3)

#### Kagan ########
points(kagan, 0,col="firebrick1",pch=19,lwd=5)
text(kagan, 0.48, "Kagan",cex = 1.2,col="firebrick1")
segments(kagan,0.0, x1 =kagan, y1 = 0.45,col="firebrick1",lty=3)

#### Scalia ########
points(scalia, 0,col="firebrick1",pch=19,lwd=5)
text(scalia, 0.33, "Scalia",cex = 1.2,col="firebrick1")
segments(scalia,0.0, x1 =scalia, y1 = 0.3,col="firebrick1",lty=3)

##########################################################################
###################	 Historic Justices				######################
##########################################################################

#### Marshall ########
points(marshall, 0,col="dodgerblue1",pch=19,lwd=5)
text(marshall, 0.33, "Marshall",cex = 1.2,col="dodgerblue1")
segments(marshall,0.0, x1 =marshall, y1 = 0.3,col="dodgerblue1",lty=3)

#### Brandeis ########
points(brandeis, 0,col="dodgerblue1",pch=19,lwd=5)
text(brandeis, 0.78, "Brandeis",cex = 1.2,col="dodgerblue1")
segments(brandeis,0.0, x1 =brandeis, y1 = 0.75,col="dodgerblue1",lty=3)

#### Frankfurter ########
points(frankfurter, 0,col="dodgerblue1",pch=19,lwd=5)
text(frankfurter, -0.73, "Frankfurter",cex = 1.2,col="dodgerblue1")
segments(frankfurter,0.0, x1 =frankfurter, y1 = -0.7,col="dodgerblue1",lty=3)

#### Jackson ########
points(jackson, 0,col="dodgerblue1",pch=19,lwd=5)
text(jackson, 0.48, "Jackson",cex = 1.2,col="dodgerblue1")
segments(jackson,0.0, x1 =jackson, y1 = 0.45,col="dodgerblue1",lty=3)

#### Holmes ########
points(holmes, 0,col="dodgerblue1",pch=19,lwd=5)
text(holmes, 0.33, "Holmes",cex = 1.2,col="dodgerblue1")
segments(holmes,0.0, x1 =holmes, y1 = 0.3,col="dodgerblue1",lty=3)


##########################################################################
###################	 Others							######################
##########################################################################

#### Shakespeare ########
shakespeare <- 5170/35000
points(shakespeare, 0,col="springgreen1",pch=19,lwd=5)
text(shakespeare, -0.38, "Shakespeare",cex = 1.2,col="springgreen1")
segments(shakespeare,0.0, x1 =0.1477, y1 = -0.35,col="springgreen1",lty=3)

#### Aesop Rock ########
aesop <- 7392/35000
points(aesop, 0,col="springgreen1",pch=19,lwd=5)
text(aesop, -0.53, "Aesop Rock",cex = 1.2,col="springgreen1")
segments(aesop,0.0, x1 =0.2112, y1 = -0.5,col="springgreen1",lty=3)

#### DMX ########
DMX <- 3214/35000
points(0.0918, 0,col="springgreen1",pch=19,lwd=5)
text(0.0918, -0.53, "DMX",cex = 1.2,col="springgreen1")
segments(0.0918,0.0, x1 =0.0918, y1 = -0.5,col="springgreen1",lty=3)

#### Jay-Z ########
JayZ <- 4506/35000
points(JayZ, 0,col="springgreen1",pch=19,lwd=5)
text(JayZ, 0.93, "Jay-Z",cex = 1.2,col="springgreen1")
segments(JayZ,0.0, x1 =0.1287, y1 = 0.9,col="springgreen1",lty=3)